/**********************************************************************/
/*
   Authors: Robbie Dulin, Clotaire Boyer
   Created: 29 April 2022
   Description: Creates analysis-prepped datasets for FI analysis.
*/
/**********************************************************************/

cap log close
local prefix: display %tdCYND td(`c(current_date)')
log using "$log/`prefix'_lasso_prep_finance", replace text


global finance_data "$cleaned/finance"

/*----------------------------------------------------*/
            /* Section: SUSENAS Finance */
/*----------------------------------------------------*/

local merge_type_finance_treats = "1:1"
local merge_type_finance_ind    = "m:1"

foreach survey in mar19 {

	foreach data in finance_treats finance_ind { //

		di "`survey': `data'"
		u "$cleaned/finance/susenas_`survey'_`data'.dta", clear

	* merge in baseline susenas for lasso controls
		gen id_jan14 = KABU
		qui forvalues year = 2016 / 2018 {
			merge m:1 id_jan14 urban using "$cleaned/mar_`year'_baseline_pool"
			assert _m != 1
			keep if _m == 3
			drop _m
		}
		drop id_jan14

	** merge in UDB
		* mar 19
		if "`survey'" == "mar19" {
			rename RENUM renum
			merge `merge_type_`data'' renum using "$cleaned/mar_2019_udb_lasso_pool.dta"
		}

		* sep 18
		else {
			rename URUT2 urut2
			merge `merge_type_`data'' urut2 using "$cleaned/sep_2018_udb_lasso_pool.dta"
		}

		assert _m != 1
		keep if _m == 3
		drop _m

		* PMT score cutoffs
		foreach cut in 5 10 15 20 25 30  {
			gen pmt`cut' = percentile_udb <= `cut' & percentile_m_udb == 0
		}
		gen above = percentile_udb > 30 | udb == 0
		gen all = 1
		gen pmt50 = percentile_udb > 50 | udb == 0

	** PODES
		cap rename r103 R103
		cap rename r104 R104
		merge m:1 R101 R102 R103 R104 using "$cleaned/podes_2018.dta"
		// assert _m != 1
		drop if _m == 2
		drop _m

	** merge in village agent data
		* prep mar 18 village data
		preserve
		if "`survey'" == "sep18" {
			u "$cleaned/finance/aug18_village_IE_treats.dta", clear
		}
		else {
			u "$cleaned/finance/`survey'_village_IE_treats.dta", clear

		}
		qui destring Kode_Provinsi, gen(R101)
		qui destring Kode_Kabupaten, gen(R102)
		qui destring Kode_Kecamatan, gen(R103)
		qui destring Kode_Desa, gen(R104)
		keep R101 R102 R103 R104 *mar18 beneficiaries num_agents_bpnt num_agents_total agent_ratio_bpnt agent_ratio_total at_least_2_bpnt at_least_2_total
		tempfile village_agent
		qui save `village_agent'
		restore

		* merge
		merge m:1 R101 R102 R103 R104 using `village_agent'
		drop if _m == 2
		qui count if _m == 1
		di "`r(N)' obs missing village-level agent data"
		drop _m

		* save
		save "$cleaned/finance/susenas_`survey'_`data'_lasso.dta", replace
	}

}





*** Impact eval village dataset
* Merge in PODES into village-level agent datasets
foreach data in aug18 mar19 {
  u "$cleaned/finance/`data'_village_IE_treats.dta", clear

  // rename for merge
  destring Kode_Provinsi, gen(R101)
  destring Kode_Kabupaten, gen(R102)
  destring Kode_Kecamatan, gen(R103)
  destring Kode_Desa, gen(R104)

  merge 1:1 R101 R102 R103 R104 using "$cleaned/podes_2018.dta"
  drop if _m == 2
  drop _m

  * 218 villages not in PODES
  * Recode so that they are 0s and equal to 1 in the missing dummies
  foreach var of varlist *podes {
    di "`var'"

    // if the podes control is not a missing dummy, make the missing villages 0
    if strpos("`var'", "miss") == 0 {
      replace `var' = 0 if `var' == .
    }
    // else, then the podes control is a missing dummy, so make it 1
    else {
      replace `var' = 1 if `var' == .
    }
  }

	// create log beneficiaries
	gen log_beneficiaries = ln(beneficiaries)

	// rename vars to shorten
	rename num_agents_w_bpnt  num_agentsw_bpnt
	rename agent_ratio_w_bpnt	  agent_ratiow_bpnt
	gen num_agentsw_mar18 = num_agents_mar18
	gen agent_ratiow_mar18 = agent_ratio_mar18
	la var num_agentsw_mar18         "\# Agents In Village (Baseline)"
	la var agent_ratiow_mar18        "Agents Per Beneficiary (Baseline)"

	rename num_agents*		 agents*
	rename agent_ratio*     ratio*
	rename both_criteria*  	both*
	rename at_least_2*  		min2*

	// encode bank variable
	encode himbara, gen(bank)

  save "$cleaned/finance/`data'_village_IE_treats_lasso.dta", replace
}

*** create pooled dataset
// load aug 18 dataset
u "$cleaned/finance/aug18_village_IE_treats_lasso.dta", clear
// summ
gen wave = 0
keep kodewilayah agents* agents_mar18 agentsw* ratio* ratio_mar18 ratiow* both* ratio250* min2* ///
	treated_sep18 *beneficiaries Provinsi Kabupaten idkab *podes wave finalstratum bank *bank ///


preserve
u "$cleaned/finance/mar19_village_IE_treats_lasso.dta", clear
// summ
gen wave = 1
keep kodewilayah agents* agents_mar18 agentsw* ratio* ratio_mar18 ratiow* both* ratio250* min2* ///
 	treated *beneficiaries Provinsi Kabupaten idkab *podes wave finalstratum
tempfile mar19_village
save `mar19_village'
restore

append using `mar19_village'

// Identify variables that are not present in both March and Sep lasso pools
local missing_vars = ""
foreach var of varlist *podes {
	cap assert `var' != .
	if _rc != 0 {
		di "`var'"
		local missing_vars = "`missing_vars' `var'"
	}
}
// none missing
// summ *total

* create stack treat
gen stack_treat = treated_sep18 == 1 & wave == 0 if wave == 0
replace stack_treat = treated == 1 & wave == 1 if wave == 1
// tab stack_treat treated_sep18 if wave == 0
// tab stack_treat treated if wave == 1

* Create wave-interacted variables
rename *_podes podes_*
ds podes*

foreach var of varlist *mar18 podes* *beneficiaries {
	if length("`var'W") > 32 {
		local newname = substr("`var'", 1, 31)
		gen `newname'W = wave * `var'
		di "`var' wave interaction named `newname'W"
	}
	else {
		gen `var'W = wave * `var'
	}
}

// asssert no missing values in lasso pool vars
foreach var of varlist *mar18 podes* *beneficiaries *W {
	cap	   assert `var' != .
	if _rc != 0 {
		di "`var'"
	}
}

save "$cleaned/finance/stacked_village_IE_treats_lasso.dta", replace



/*----------------------------------------------------*/
            /* Section: PODES 2019 Finance */
/*----------------------------------------------------*/

u "$finance_data/mar19_village_IE_treats_lasso.dta", clear

merge 1:1 R101 R102 R103 R104 using "$cleaned/podes_2019.dta"

keep if _merge == 3

drop _merge

// Merge in 2018 info for controls

// drop _merge

// merge 1:1 R101 R102 R103 R104 using "$cleaned/podes_2018.dta"

// drop if _merge == 2

// drop _merge


save "$cleaned/podes_2019_oursample.dta", replace

cap log close
